#Replication code for 'Are Donations to Charity an Effective Incentive for Public Officials?'
library(MASS)
library(reshape2)
library(multiwayvcov)
library(lmtest)

#setwd('SET UP YOUR WORKING DIRECTORY HERE')
inc<-read.csv('incentives_rep.csv',stringsAsFactors = F)

####TO COMPUTE DISTANCES
#Range of possible answers by item
scale_1<-range(inc$resp_1c,na.rm=T)[2]-range(inc$resp_1c,na.rm=T)[1]
scale_2<-range(inc$resp_2c,na.rm=T)[2]-range(inc$resp_2c,na.rm=T)[1]
scale_3<-range(inc$resp_3c,na.rm=T)[2]-range(inc$resp_3c,na.rm=T)[1]
scale_4<-range(inc$resp_4c,na.rm=T)[2]-range(inc$resp_4c,na.rm=T)[1]
scale_5<-range(inc$resp_5c,na.rm=T)[2]-range(inc$resp_5c,na.rm=T)[1]
scale_6<-range(inc$resp_6c,na.rm=T)[2]-range(inc$resp_6c,na.rm=T)[1]
scale_7<-range(inc$resp_7c,na.rm=T)[2]-range(inc$resp_7c,na.rm=T)[1]
scale_8<-100
scale_0 <-range(inc$resp_0c,na.rm=T)[2]-range(inc$resp_0c,na.rm=T)[1]

#Individual answers recoded to vary from 0 to 1
inc$score1c <- (abs(inc$resp_1c-3.6)/scale_1)
inc$score2c <- (abs(inc$resp_2c-17)/scale_2)
inc$score3c <- (abs(inc$resp_3c-51.06)/scale_3)
inc$score4c <- (abs(inc$resp_4c-9.9)/scale_4)
inc$score5c <- (abs(inc$resp_5c-7.9)/scale_5)
inc$score6c <- (abs(inc$resp_6c-1.1)/scale_6)
inc$score7c <- (abs(inc$resp_7c-4486)/scale_7)
inc$score8c <- (abs(inc$resp_8c-12.92)/scale_8)
inc$score0c <- (abs(inc$resp_0c+2.3)/scale_0)

#Data frame with the score distances by respondent
responses<-cbind(inc$score1c,inc$score2c,inc$score3c,
                 inc$score4c,inc$score5c,inc$score6c,inc$score7c,
                 inc$score8c)
#Number of questions answered
inc$items.answered<-(8-rowSums(is.na(responses)))
#Average distance by respondent
inc$ave_dist <- (rowSums(responses,na.rm=TRUE))/ inc$items.answered


################################
#TABLE 1 - Main results
################################
mod1.1<-lm(ave_dist ~ money_self + money_charity, 
              inc)
summary(mod1.1)


inc$gamble<-as.factor(inc$gamble)
mod1.2<-polr(gamble~money_self + money_charity,
                 method="probit",inc)
summary(mod1.2)


#################################
#TABLE A1 - Participants by state
#################################
# NCSL
table(inc$state[inc$Exp==0])
# NLC
table(inc$state[inc$Exp==1])

#################################
#TABLE A2 - Demographics
#################################
# Gender
inc$female<-inc$gender-1
prop.table(table(inc$female[inc$Exp==0]))
prop.table(table(inc$female[inc$Exp==1]))
prop.table(table(inc$female))
# Age
summary(inc$age[inc$Exp==0])
summary(inc$age[inc$Exp==1])
summary(inc$age)
# Higher Education
prop.table(table(inc$education[inc$Exp==0]))[6]+prop.table(table(inc$education[inc$Exp==0]))[5]
prop.table(table(inc$education[inc$Exp==1]))[6]+prop.table(table(inc$education[inc$Exp==1]))[5]
prop.table(table(inc$education))[6]+prop.table(table(inc$education))[5]
# Elected Official
inc$elect<-ifelse(inc$elected_NLC==1|inc$position_NCSL==1,1,0)
inc$elect[is.na(inc$elect)]<-0
prop.table(table(inc$elect[inc$Exp==0]))
prop.table(table(inc$elect[inc$Exp==1]))
prop.table(table(inc$elect))
# Democrat
prop.table(table(inc$democrat[inc$Exp==0]))
prop.table(table(inc$democrat[inc$Exp==1]))
prop.table(table(inc$democrat))
# Republican
prop.table(table(inc$republican[inc$Exp==0]))
prop.table(table(inc$republican[inc$Exp==1]))
prop.table(table(inc$republican))
# In Majority (NCSL Only)
prop.table(table(inc$maj[inc$Exp==0]))  #Variable=1 if in majority 
# Non-partisan Elections (NLC Only)
inc$nonpart_elec<-inc$nonpart_elec-1
prop.table(table(inc$nonpart_elec))

#################################
#TABLE A3 - Covariate Balance
#################################
#NCSL
summary(glm(money_self~female+age+education+elect+democrat+republican, family='binomial'(link='probit'), 
            inc[inc$Exp==0&(inc$control==1|inc$money_self==1),]))
summary(glm(money_charity~female+age+education+elect+democrat+republican, family='binomial'(link='probit'), 
            inc[inc$Exp==0&(inc$control==1|inc$money_charity==1),]))
summary(glm(money_charity~female+age+education+elect+democrat+republican, family='binomial'(link='probit'), 
            inc[inc$Exp==0&(inc$money_charity==1|inc$money_self==1),]))
#NLC
summary(glm(money_self~female+age+education+elect+democrat+republican, family='binomial'(link='probit'), 
            inc[inc$Exp==1&(inc$control==1|inc$money_self==1),]))
summary(glm(money_charity~female+age+education+elect+democrat+republican, family='binomial'(link='probit'), 
            inc[inc$Exp==1&(inc$control==1|inc$money_charity==1),]))
summary(glm(money_charity~female+age+education+elect+democrat+republican, family='binomial'(link='probit'), 
            inc[inc$Exp==1&(inc$money_charity==1|inc$money_self==1),]))


#################################
#TABLE A4 - Average responses (Ns)
#################################
# Means
means_NCSL<-NULL; ns_NCSL<-NULL
for (i in 0:8){
  means_NCSL<-c(means_NCSL,
                round(mean(na.omit(inc[[paste('resp_', i, 'c', sep='')]][inc$Exp==0])),2))
  ns_NCSL<-c(ns_NCSL,
             length(na.omit(inc[[paste('resp_', i, 'c', sep='')]][inc$Exp==0])))
  }
means_NCSL
ns_NCSL

means_NLC<-NULL; ns_NLC<-NULL
for (i in 1:8){
  means_NLC<-c(means_NLC,
                round(mean(na.omit(inc[[paste('resp_', i, 'c', sep='')]][inc$Exp==1])),2))
  ns_NLC<-c(ns_NLC,
             length(na.omit(inc[[paste('resp_', i, 'c', sep='')]][inc$Exp==1])))
}
means_NLC
ns_NLC


##################################################################
#TABLE A5 - # of items answered by treatment assignment
##################################################################
mod.totalanswered <-lm(total_answered ~ money_self + money_charity, inc)
summary(mod.totalanswered)



#################################
#TABLE A6 - Main results, by item
#################################
#Individual answers recoded to vary from 0 to 1
inc$score1 <- (inc$resp_1c+4)/scale_1
inc$score2 <- (inc$resp_2c-3)/scale_2
inc$score3 <- (inc$resp_3c-50)/scale_3
inc$score4 <- (inc$resp_4c-9)/scale_4
inc$score5 <- (inc$resp_5c-3)/scale_5
inc$score6 <- (inc$resp_6c+2)/scale_6
inc$score7 <- (inc$resp_7c-1000)/scale_7
inc$score8 <- (inc$resp_8c)/scale_8
inc$score0 <- (inc$resp_0c)/scale_0

#Creating new data frame where observation = item response
scores.all<-melt(inc[,(ncol(inc)-8):ncol(inc)])
df.scores<-data.frame(rep(1:474,9),scores.all,rep(inc$money_self,9),
                      rep(inc$money_charity,9), rep(inc$Exp,9))
names(df.scores)<-c("id","question","score","money_self","money_charity",
                    "Exp")


#Correct answers rescaled to vary from 0 to 1
truth<-c(3.6,17,51.06,9.9,7.9,1.1,4486,12.92)
truth.score1<-(3.6+4)/scale_1
truth.score2<-(17-3)/scale_2
truth.score3<-(51.06-50)/scale_3
truth.score4<-(9.9-9)/scale_4
truth.score5<-(7.9-3)/scale_5
truth.score6<-(1.1+2)/scale_6
truth.score7<-(4486-1000)/scale_7
truth.score8<-(12.92)/scale_8
truth.score0<-(-2.3+4)/scale_0
truth.scores<-c(truth.score1,truth.score2,truth.score3,truth.score4,
                truth.score5,truth.score6,truth.score7,truth.score8,truth.score0)

#Building the outcome variable distance
for (i in 1:dim(df.scores)[1]){
  df.scores$item[i]<-which(unique(df.scores$question)==df.scores$question[i])  
  df.scores$truth[i]<-truth.scores[df.scores$item[i]]
}
df.scores$dist<-abs(df.scores$score-df.scores$truth)


#Model
mod.dist.new<-lm(dist ~ money_self + money_charity +
                   question, df.scores[df.scores$item<9,])
mod.dist.vcovCL<-cluster.vcov(mod.dist.new, df.scores$id[df.scores$item<9]) #Adding Clustered Std Errors

#The results with clustered SEs
coeftest(mod.dist.new,mod.dist.vcovCL)
